lets import all the libraries first
import the Data. This is the data in its raw form which requires some procession before it can be analysed
daftdb <- rio::import("daftdb.rds") %>% as_tibble()
# I didn't know the package {rio} to load data but it's a good practice to have structure in your project with the data being in its own data folder (see for example https://chrisvoncsefalvay.com/2018/08/09/structuring-r-projects/)
Process and clean data.
# df <- daftdb %>%
# mutate(price = iconv(enc2utf8(daftdb$price), sub = "byte")) %>%
# mutate(address = iconv(enc2utf8(daftdb$address), sub = "byte")) %>%
# mutate(sale_type = case_when(str_detect(price, "^Reserve") == TRUE ~ "Auction", TRUE ~ "Sale")) %>%
# mutate(price = str_remove_all(price, "[:alpha:]|[:]|[,]|[\u20AC]|[\u0020]")) %>%
# mutate(price = as.numeric(price)) %>%
# mutate_at(vars(price), ~ if_else(is.na(.), 0, .)) %>%
# mutate(region = str_extract(address, "Dublin [0-9]+")) %>%
# mutate(region = as.factor(region), structure = as.factor(structure)) %>%
# mutate(date = as.Date(date, format = "%m/%d/%y"))
df <- daftdb %>%
mutate(
price = readr::parse_number(price, locale = readr::locale(decimal_mark = ".", grouping_mark = ",")),
region = str_extract(address, "Dublin [0-9]+"),
date = as.Date(date, format = "%m/%d/%y")
)
glimpse(df)
## Rows: 37,487
## Columns: 9
## $ index <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ address <chr> "138 Church Road, East Wall, Dublin 3", "44 Shanglas Road, …
## $ price <dbl> 350000, 445000, 595000, 375000, 845000, 550000, 350000, 495…
## $ bathroom <dbl> 1, 2, 2, 1, 1, 3, 1, 1, 3, 2, 2, 2, 5, 1, 1, 1, 1, 2, 2, 2,…
## $ bedroom <dbl> 3, 3, 4, 3, 4, 4, 2, 3, 4, 3, 4, 4, 6, 4, 2, 3, 3, 3, 4, 3,…
## $ structure <chr> "Terraced House", "Semi-Detached House", "Terraced House", …
## $ date <date> 2020-03-09, 2020-03-09, 2020-03-09, 2020-03-09, 2020-03-09…
## $ weblink <chr> "/dublin/houses-for-sale/east-wall/138-church-road-east-wal…
## $ region <chr> "Dublin 3", "Dublin 9", "Dublin 3", "Dublin 11", "Dublin 3"…
Mean House Price
df %>%
group_by(week = floor_date(date, "week")) %>%
summarise(amount = mean(price, na.rm = TRUE)) %>%
ggplot(aes(x = week, y = amount / 100000, fill = week)) +
geom_line(stat = "identity") +
theme_classic() +
labs(
y = "Mean Price (in Hundred Thousands Euros)",
x = "Weeks",
title = paste("Trend in House Prices (mean) during First Lockdown in Dublin")
)
df %>%
group_by(region) %>%
summarise(amount = mean(price)) %>%
arrange(desc(amount)) %>%
ggplot(aes(x = region, y = amount / 100000)) +
geom_bar(stat = "identity") +
theme_classic() +
labs(
y = "Mean Price (in Hundred Thousands Euros)",
x = "Regions",
title = paste("Mean House Prices regionwise during First Lockdown in Dublin")
)
df %>%
group_by(region, week = floor_date(date, "week")) %>%
summarise(amount = mean(price, na.rm = TRUE)) %>%
filter(!is.na(region)) %>%
ggplot(aes(x = week, y = amount / 100000, fill = week)) +
geom_line(stat = "identity") +
facet_wrap(~ region) +
theme_classic() +
labs(
y = "Mean Price (in Hundred Thousands Euros)",
x = "Weeks",
title = paste("Trend in House Prices (mean) during First Lockdown in Dublin Regions")
)
This Chart depicts price data in County Dublin excluding above regions in Dublin
plot_df <- df %>%
group_by(region, week = floor_date(date, "week")) %>%
summarise(amount = mean(price, na.rm = TRUE)) %>%
filter(!is.na(region)) %>%
ggplot(aes(x = week, y = amount / 100000, color = region)) +
geom_line() +
#geom_smooth(aes(x = week, y = amount / 100000), method = "gam", inherit.aes = FALSE, color = "black") +
theme_classic() +
labs(
y = "Mean Price (in Hundred Thousands Euros)",
x = "Weeks",
title = "Trend in House Prices (mean) during First Lockdown in Dublin Regions"
)
ggplotly(plot_df)